This is a project that uses Canadian General Election data to create visualizations. Original dataset can be found here: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ABFNSQ
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
Election_data <- read_csv("federal-candidates-2021-10-20.csv")
## Rows: 46526 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (22): type_elxn, elected, candidate_name, incumbent, gender, country_bi...
## dbl (8): id, parliament, year, birth_year, riding_id, votes, percent_votes...
## date (1): edate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(Election_data)
## spc_tbl_ [46,526 × 31] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : num [1:46526] 26093 13011 27974 18040 1798 ...
## $ parliament : num [1:46526] 1 1 1 1 1 1 1 1 1 1 ...
## $ year : num [1:46526] 1867 1867 1867 1867 1867 ...
## $ type_elxn : chr [1:46526] "General" "General" "General" "General" ...
## $ elected : chr [1:46526] "Elected" "Elected" "Not elected" "Elected" ...
## $ candidate_name : chr [1:46526] "POWER," "JONES," "SHANNON, S.L." "KIRKPATRICK, Thomas" ...
## $ edate : Date[1:46526], format: "1867-08-07" "1867-08-07" ...
## $ incumbent : chr [1:46526] "Not incumbent" "Not incumbent" "Not incumbent" "Not incumbent" ...
## $ gender : chr [1:46526] "M" "M" "M" "M" ...
## $ birth_year : num [1:46526] 1815 1824 NA 1805 1829 ...
## $ country_birth : chr [1:46526] NA NA NA NA ...
## $ lgbtq2_out : chr [1:46526] NA NA NA NA ...
## $ indigenousorigins : chr [1:46526] "Other" "Other" "Other" "Other" ...
## $ occupation : chr [1:46526] "merchant" "merchant" "lawyer" "lawyer" ...
## $ lawyer : chr [1:46526] "Other" "Other" "Lawyer" "Lawyer" ...
## $ censuscategory : chr [1:46526] "Sales and service occupations" "Sales and service occupations" "Occupations in education, law and social, community and government services" "Occupations in education, law and social, community and government services" ...
## $ riding_id : num [1:46526] NA NA NA NA NA NA NA NA NA NA ...
## $ riding : chr [1:46526] "HALIFAX" "HALIFAX" "HALIFAX" "FRONTENAC" ...
## $ province : chr [1:46526] "Nova Scotia" "Nova Scotia" "Nova Scotia" "Ontario" ...
## $ votes : num [1:46526] 2367 2381 2154 1242 NA ...
## $ percent_votes : num [1:46526] 26.1 26.3 23.8 64.2 100 ...
## $ acclaimed : chr [1:46526] "Not acclaimed" "Not acclaimed" "Not acclaimed" "Not acclaimed" ...
## $ switcher : chr [1:46526] "Switcher" "Switcher" "Not switcher" "Not switcher" ...
## $ multiple_candidacy: chr [1:46526] "Single" "Single" "Single" "Single" ...
## $ party_raw : chr [1:46526] "Anti-Confederate" "Labour" "Unknown" "Conservative" ...
## $ party_minor_group : chr [1:46526] "Third_Party" "Labour" "Independent" "Conservative" ...
## $ party_major_group : chr [1:46526] "Third_Party" "Third_Party" "Independent" "Conservative" ...
## $ gov_party_raw : chr [1:46526] "Conservative" "Conservative" "Conservative" "Conservative" ...
## $ gov_minor_group : chr [1:46526] "Conservative" "Conservative" "Conservative" "Conservative" ...
## $ gov_major_group : chr [1:46526] "Conservative" "Conservative" "Conservative" "Conservative" ...
## $ num_candidates : num [1:46526] 4 4 4 2 1 2 2 1 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_double(),
## .. parliament = col_double(),
## .. year = col_double(),
## .. type_elxn = col_character(),
## .. elected = col_character(),
## .. candidate_name = col_character(),
## .. edate = col_date(format = ""),
## .. incumbent = col_character(),
## .. gender = col_character(),
## .. birth_year = col_double(),
## .. country_birth = col_character(),
## .. lgbtq2_out = col_character(),
## .. indigenousorigins = col_character(),
## .. occupation = col_character(),
## .. lawyer = col_character(),
## .. censuscategory = col_character(),
## .. riding_id = col_double(),
## .. riding = col_character(),
## .. province = col_character(),
## .. votes = col_double(),
## .. percent_votes = col_double(),
## .. acclaimed = col_character(),
## .. switcher = col_character(),
## .. multiple_candidacy = col_character(),
## .. party_raw = col_character(),
## .. party_minor_group = col_character(),
## .. party_major_group = col_character(),
## .. gov_party_raw = col_character(),
## .. gov_minor_group = col_character(),
## .. gov_major_group = col_character(),
## .. num_candidates = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(Election_data)
## id parliament year type_elxn
## Min. : 1 Min. : 1.00 Min. :1867 Length:46526
## 1st Qu.: 6478 1st Qu.:21.00 1st Qu.:1949 Class :character
## Median :22031 Median :32.00 Median :1980 Mode :character
## Mean :18474 Mean :28.64 Mean :1970
## 3rd Qu.:28613 3rd Qu.:38.00 3rd Qu.:2004
## Max. :36769 Max. :44.00 Max. :2021
##
## elected candidate_name edate incumbent
## Length:46526 Length:46526 Min. :1867-08-07 Length:46526
## Class :character Class :character 1st Qu.:1949-06-27 Class :character
## Mode :character Mode :character Median :1980-02-18 Mode :character
## Mean :1970-11-14
## 3rd Qu.:2004-06-28
## Max. :2021-09-20
##
## gender birth_year country_birth lgbtq2_out
## Length:46526 Min. :1798 Length:46526 Length:46526
## Class :character 1st Qu.:1864 Class :character Class :character
## Mode :character Median :1908 Mode :character Mode :character
## Mean :1902
## 3rd Qu.:1942
## Max. :1998
## NA's :34250
## indigenousorigins occupation lawyer censuscategory
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## riding_id riding province votes
## Min. : 6001 Length:46526 Length:46526 Min. : 0
## 1st Qu.:24048 Class :character Class :character 1st Qu.: 1094
## Median :35052 Mode :character Mode :character Median : 4058
## Mean :35464 Mean : 6975
## 3rd Qu.:47006 3rd Qu.:10371
## Max. :62001 Max. :71535
## NA's :35487 NA's :691
## percent_votes acclaimed switcher multiple_candidacy
## Min. : 0.000 Length:46526 Length:46526 Length:46526
## 1st Qu.: 4.268 Class :character Class :character Class :character
## Median : 23.124 Mode :character Mode :character Mode :character
## Mean : 26.351
## 3rd Qu.: 44.760
## Max. :100.000
## NA's :73
## party_raw party_minor_group party_major_group gov_party_raw
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## gov_minor_group gov_major_group num_candidates
## Length:46526 Length:46526 Min. : 1.000
## Class :character Class :character 1st Qu.: 3.000
## Mode :character Mode :character Median : 5.000
## Mean : 4.771
## 3rd Qu.: 6.000
## Max. :21.000
##
duplicate_test <- Election_data %>% duplicated()
sum(duplicate_test)
## [1] 0
Election_data1 <- Election_data %>% select(-1,-10)
summary(Election_data1)
## parliament year type_elxn elected
## Min. : 1.00 Min. :1867 Length:46526 Length:46526
## 1st Qu.:21.00 1st Qu.:1949 Class :character Class :character
## Median :32.00 Median :1980 Mode :character Mode :character
## Mean :28.64 Mean :1970
## 3rd Qu.:38.00 3rd Qu.:2004
## Max. :44.00 Max. :2021
##
## candidate_name edate incumbent gender
## Length:46526 Min. :1867-08-07 Length:46526 Length:46526
## Class :character 1st Qu.:1949-06-27 Class :character Class :character
## Mode :character Median :1980-02-18 Mode :character Mode :character
## Mean :1970-11-14
## 3rd Qu.:2004-06-28
## Max. :2021-09-20
##
## country_birth lgbtq2_out indigenousorigins occupation
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## lawyer censuscategory riding_id riding
## Length:46526 Length:46526 Min. : 6001 Length:46526
## Class :character Class :character 1st Qu.:24048 Class :character
## Mode :character Mode :character Median :35052 Mode :character
## Mean :35464
## 3rd Qu.:47006
## Max. :62001
## NA's :35487
## province votes percent_votes acclaimed
## Length:46526 Min. : 0 Min. : 0.000 Length:46526
## Class :character 1st Qu.: 1094 1st Qu.: 4.268 Class :character
## Mode :character Median : 4058 Median : 23.124 Mode :character
## Mean : 6975 Mean : 26.351
## 3rd Qu.:10371 3rd Qu.: 44.760
## Max. :71535 Max. :100.000
## NA's :691 NA's :73
## switcher multiple_candidacy party_raw party_minor_group
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## party_major_group gov_party_raw gov_minor_group gov_major_group
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## num_candidates
## Min. : 1.000
## 1st Qu.: 3.000
## Median : 5.000
## Mean : 4.771
## 3rd Qu.: 6.000
## Max. :21.000
##
Election_data2 <- mutate(Election_data,Party_new=as.factor(party_raw),
Gender_new=as.factor(gender),
Elect_new=as.factor(elected),
Incumbent_new=as.factor(incumbent),
Country_birth_new=as.factor(country_birth),
lgbtq2_new=as.factor(lgbtq2_out),
indigenous_new=as.factor(indigenousorigins),
occupation_new=as.factor(occupation),
Lawyer_new=as.factor(lawyer),
census_new=as.factor(censuscategory),
riding_new=as.factor(riding),
province_new=as.factor(province))
summary(Election_data2)
## id parliament year type_elxn
## Min. : 1 Min. : 1.00 Min. :1867 Length:46526
## 1st Qu.: 6478 1st Qu.:21.00 1st Qu.:1949 Class :character
## Median :22031 Median :32.00 Median :1980 Mode :character
## Mean :18474 Mean :28.64 Mean :1970
## 3rd Qu.:28613 3rd Qu.:38.00 3rd Qu.:2004
## Max. :36769 Max. :44.00 Max. :2021
##
## elected candidate_name edate incumbent
## Length:46526 Length:46526 Min. :1867-08-07 Length:46526
## Class :character Class :character 1st Qu.:1949-06-27 Class :character
## Mode :character Mode :character Median :1980-02-18 Mode :character
## Mean :1970-11-14
## 3rd Qu.:2004-06-28
## Max. :2021-09-20
##
## gender birth_year country_birth lgbtq2_out
## Length:46526 Min. :1798 Length:46526 Length:46526
## Class :character 1st Qu.:1864 Class :character Class :character
## Mode :character Median :1908 Mode :character Mode :character
## Mean :1902
## 3rd Qu.:1942
## Max. :1998
## NA's :34250
## indigenousorigins occupation lawyer censuscategory
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## riding_id riding province votes
## Min. : 6001 Length:46526 Length:46526 Min. : 0
## 1st Qu.:24048 Class :character Class :character 1st Qu.: 1094
## Median :35052 Mode :character Mode :character Median : 4058
## Mean :35464 Mean : 6975
## 3rd Qu.:47006 3rd Qu.:10371
## Max. :62001 Max. :71535
## NA's :35487 NA's :691
## percent_votes acclaimed switcher multiple_candidacy
## Min. : 0.000 Length:46526 Length:46526 Length:46526
## 1st Qu.: 4.268 Class :character Class :character Class :character
## Median : 23.124 Mode :character Mode :character Mode :character
## Mean : 26.351
## 3rd Qu.: 44.760
## Max. :100.000
## NA's :73
## party_raw party_minor_group party_major_group gov_party_raw
## Length:46526 Length:46526 Length:46526 Length:46526
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## gov_minor_group gov_major_group num_candidates
## Length:46526 Length:46526 Min. : 1.000
## Class :character Class :character 1st Qu.: 3.000
## Mode :character Mode :character Median : 5.000
## Mean : 4.771
## 3rd Qu.: 6.000
## Max. :21.000
##
## Party_new Gender_new Elect_new
## Liberal :10457 2 : 2 Elected :12386
## New Democratic Party : 5918 F : 6585 Not elected:34140
## Progressive Conservative : 5015 M :39938
## Conservative : 3373 NA's: 1
## Green Party of Canada : 2606
## Conservative Party of Canada: 2281
## (Other) :16876
## Incumbent_new Country_birth_new lgbtq2_new indigenous_new
## Incumbent : 9337 Canada : 462 Not Out: 2003 Indigenous: 236
## Not incumbent:37121 India : 10 Out : 134 Other :46290
## NA's : 68 Hong Kong : 8 NA's :44389
## United States: 8
## Lebanon : 5
## (Other) : 50
## NA's :45983
## occupation_new Lawyer_new
## lawyer : 3940 Lawyer: 5807
## farmer : 2624 Other :36328
## teacher : 1536 NA's : 4391
## parliamentarian: 1240
## merchant : 1195
## (Other) :31870
## NA's : 4121
## census_new
## Occupations in education, law and social, community and government services:13060
## Business, finance and administration occupations : 6233
## Sales and service occupations : 4288
## Natural resources, agriculture and related production occupations : 3511
## Members of Parliament : 2404
## (Other) :11434
## NA's : 5596
## riding_new province_new
## VICTORIA : 307 Ontario :15965
## HALIFAX : 228 Quebec :13026
## VANCOUVER CENTRE: 186 British Columbia: 4606
## YORK WEST : 164 Alberta : 3401
## HOCHELAGA : 155 Manitoba : 2279
## VANCOUVER EAST : 152 Saskatchewan : 2106
## (Other) :45334 (Other) : 5143
Election_data3 <- Election_data2 %>% select(year,elected,gender,riding,province,votes,percent_votes,party_raw)
Election_data4 <- Election_data3 %>% filter(elected == "Elected")
summary(Election_data4)
## year elected gender riding
## Min. :1867 Length:12386 Length:12386 Length:12386
## 1st Qu.:1917 Class :character Class :character Class :character
## Median :1958 Mode :character Mode :character Mode :character
## Mean :1952
## 3rd Qu.:1993
## Max. :2021
##
## province votes percent_votes party_raw
## Length:12386 Min. : 43 Min. : 20.18 Length:12386
## Class :character 1st Qu.: 5081 1st Qu.: 45.70 Class :character
## Mode :character Median :12442 Median : 52.06 Mode :character
## Mean :13808 Mean : 54.44
## 3rd Qu.:20994 3rd Qu.: 58.86
## Max. :71535 Max. :100.00
## NA's :652 NA's :34
Election_data5 <- Election_data4 %>% group_by(party_raw,province,year) %>% summarise(count=n()) %>% arrange(-count)
## `summarise()` has grouped output by 'party_raw', 'province'. You can override
## using the `.groups` argument.
summary(Election_data5)
## party_raw province year count
## Length:1670 Length:1670 Min. :1867 Min. : 1.000
## Class :character Class :character 1st Qu.:1903 1st Qu.: 1.000
## Mode :character Mode :character Median :1945 Median : 2.000
## Mean :1944 Mean : 7.417
## 3rd Qu.:1980 3rd Qu.: 7.000
## Max. :2021 Max. :101.000
histo <- ggplot(data=Election_data4) + geom_bar(mapping=aes(x=party_raw),na.rm=FALSE) + theme(axis.text.x = element_text(angle = 90))
bubble <- ggplot(data=Election_data5) + geom_point(mapping=aes(x=year,y=count,size=count,color=party_raw)) + facet_wrap(~province) + scale_size(range = c(.1, 3), name="Count")
Election_data6 <- Election_data5 %>% filter(year==2021)
Election_data6$id = Election_data6$consecutive_numbers<-1:30
label_data <- Election_data6
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar
label_data$hjust<-ifelse( angle < -90, 1, 0)
label_data$angle<-ifelse(angle < -90, angle+180, angle)
#mutate
label_data$combined = paste(label_data$party_raw,label_data$province)
p <- ggplot(label_data, aes(x=as.factor(id), y=count,alpha=count)) +
geom_bar(stat="identity",fill="Green") +
ylim(-100,200) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm") # Adjust the margin to make in sort labels are not truncated!
) +
coord_polar(start = 0) +
geom_text(data=label_data, aes(x=id, y=count+10, label=combined, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE )